*******************************************************************************
*Replication script for study 1: Claassen, J., Höhne, J.K., & Kuhlmann, J. 
*(in press). Asking for feedback: Innovating final comment questions 
* in self-administered web surveys. Journal of Survey Statistics and Methodology
*******************************************************************************

*install levene package
ssc install levene

*import dataset
use "final_comment_study_1.dta", replace


*******************
*Sample composition
*******************

*age
sum age

*gender
tab gender

*education
tab high_education

*device
tab device

*experimental groups
tab group

*statistical tests to evaluate the effectiveness of random assignment
levene age, by(group)
ttest age, by(group) 
tab group gender, row chi2
tab group high_education, row chi2
tab group device, row chi2


***********************
*Intercoder reliability
***********************

*topic-related comments
tab agreement_topic_related_comment /*agreement rate*/

kap topic_related_comment topic_related_comment_coder2 if subsample_topic_related_comment == 1 /*cohen's kappa*/

*number of topics
tab agreement_number_of_topics /*agreement rate*/

kap number_of_topics number_of_topics_coder2 if subsample_number_of_topics == 1, wgt(w2) /*weighted cohen's kappa*/


*******************************************************************************
*Table 1. Statistics across the single-box and list-style conditions (Study 1)
*******************************************************************************

*answer provision
tab group answer_provision, row
prtest answer_provision, by(group)

*topic-related comments
tab group topic_related_comment, row
prtest topic_related_comment, by(group)

*number of topics
levene number_of_topics, by(group)
ttest number_of_topics, by(group)


****************************************
*Table 2. Regression analyses (Study 1)
****************************************

*keep only complete cases
local vars interest high_education female age difficulty topic_sensitivity smartphone
gen byte complete_cases = 1
foreach var of local vars {
    replace complete_cases = 0 if missing(`var')
}
keep if complete_cases == 1

*answer provision
logit answer_provision i.group interest i.high_education //m1

logit answer_provision i.group interest i.high_education i.female age difficulty topic_sensitivity i.smartphone //m2

*topic-related comments
logit topic_related_comment i.group interest i.high_education //m1

logit topic_related_comment i.group interest i.high_education i.female age difficulty topic_sensitivity i.smartphone //m2

*number of topics
tpoisson number_of_topics i.group interest i.high_education //m1

tpoisson number_of_topics i.group interest i.high_education i.female age difficulty topic_sensitivity i.smartphone //m2


*************************
*Supplementary Material C
*************************

*import dataset
use "final_comment_study_1.dta", replace

*Table C1. Sample distribution of survey interest by study
tab interest

*Table C2. Sample distribution of survey difficulty by study
tab difficulty

*Table C3. Sample distribution of topic sensitivity by study
tab topic_sensitivity


*************************
*Supplementary Material D
*************************

*import dataset
use "final_comment_study_1.dta", replace

*prepare code variables for reshape command
local i = 1
foreach v in positive_fun positive_questions positive_results positive_other neutral_views neutral_general neutral_other negative_unclear negative_intimacy negative_questions negative_format negative_questionnaire negative_visual negative_missing negative_other {
    rename `v' var_`i'_`v'
	local ++i
}

*reshape from wide to long (i.e., one observation per code)
reshape long var_, i(id) j(category, string)
keep id group category var_
keep if var_ == 1
tab var_

*Table D1. Topic proportions across single-box and list-style conditions (Study 1)
tab category group, col


*************************
*Supplementary Material E
*************************

*import dataset
use "final_comment_study_1.dta", replace

*Table E1. Frequency and proportion of item nonresponse, provided answers, and topic-related comments across studies
tab group answer_provision, row
tab group topic_related_comment, row


*************************
*Supplementary Material F
*************************

*keep only complete cases
local vars interest high_education female age difficulty topic_sensitivity smartphone
gen byte complete_cases = 1
foreach var of local vars {
    replace complete_cases = 0 if missing(`var')
}
keep if complete_cases == 1

*Table F1. Regression analyses with interaction terms (Study 1)

*answer provision
logit answer_provision interest i.female age difficulty topic_sensitivity i.smartphone i.high_education##i.group

logit answer_provision interest i.high_education age difficulty topic_sensitivity i.smartphone i.female##i.group

logit answer_provision interest i.high_education i.female difficulty topic_sensitivity i.smartphone c.age##i.group

*topic-related comment
logit topic_related_comment interest i.female age difficulty topic_sensitivity i.smartphone i.high_education##i.group

logit topic_related_comment interest i.high_education age difficulty topic_sensitivity i.smartphone i.female##i.group

logit topic_related_comment interest i.high_education i.female difficulty topic_sensitivity i.smartphone c.age##i.group

*number of topics
tpoisson number_of_topics interest i.female age difficulty topic_sensitivity i.smartphone i.high_education##i.group

tpoisson number_of_topics interest i.high_education age difficulty topic_sensitivity i.smartphone i.female##i.group

tpoisson number_of_topics interest i.high_education i.female difficulty topic_sensitivity i.smartphone c.age##i.group